library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.0
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tseries) #2
library(forecast)#2
library(readxl)#2
library(sf) #3
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
library(leaflet) #3
library(ggplot2)#4
truckee_flow <- read_csv("truckee_flow.csv") #read in data
#remove unnecessary columns
truckee_df <- truckee_flow %>%
select(year_nu, mean_va) %>%
rename(year = year_nu, cfs=mean_va)
Convert to ‘df_flow’ to time-series data
truckee_ts <- ts(truckee_df$cfs, frequency = 12, start = c(2000,1))
Plot ‘tru_ts’
plot(truckee_ts)
Decompose the plot for further data exploration
truckee_dc <- decompose(truckee_ts)
plot(truckee_dc)
The time-series data for streamflow in the Truckee River displays a seasonal pattern. Streamflows are lower in the beginning of each year and steadily increase until they peak a few months later, then decreases throughout the rest of the year with minor fluctuations. The peaks in each year tend to vary and is likely due to variations in snowpack and other precipitation events.
Holt-Winters
truckee_hw <-HoltWinters(truckee_ts) #Computes Holt-Winters Filtering of a given time series. Unknown parameters are determined by minimizing the squared prediction error.
#tru_hw # gives more weight to more recent observations
plot(truckee_hw)
5-year forecast of Truckee River flow
truckee_fc <- forecast(truckee_hw, h = 60) # 5 year forecast... h = 60 months or 5 years
plot(truckee_fc)
Visualize model residuals
hist(truckee_fc$residuals)# residuals look normally distributed so model is a good fit... aka not over/under estimating
#Task 3
nps<-read_sf(dsn = ".", layer = "nps_boundary") #read in nps data
nps_ca<-nps %>%
filter(STATE=="CA") #filter for state
nps_ca$UNIT_TYPE<-as.factor(nps_ca$UNIT_TYPE) #convert park type to factor
factpal <- colorFactor(topo.colors(8), nps_ca$UNIT_TYPE) #choosing palette for polygon golors.
#I chose to use the park type as the color, however this can be by individual park, or whatever you choose
nps_ca_map <-nps_ca %>%
leaflet() %>%
addTiles() %>%
addPolygons(fillColor = ~factpal(UNIT_TYPE), #color of each polygon
fillOpacity = 1, #the opacity of the polygons
stroke = TRUE, #whether to outline the park boundaries,
color = "black",#color of polygon boundaries
weight = 0.2, #stroke width in pixels
opacity = 1, #stroke opacity
label = nps_ca$PARKNAME, #label of park when you hover over it
highlightOptions = highlightOptions(color = "white", weight = 5, bringToFront = TRUE),
labelOptions = labelOptions(style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",direction = "auto")) %>% #how the park name shows up when you hover over it
addLegend("bottomright", pal = factpal, values = ~UNIT_TYPE,
title = "Park Classification",
opacity = 1)
nps_ca_map
lizard<-read_csv("lter_lizard_pitfall.csv")
## Warning: Missing column names filled in: 'X2' [2], 'X3' [3], 'X4' [4],
## 'X5' [5], 'X6' [6], 'X7' [7], 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11],
## 'X12' [12], 'X13' [13], 'X14' [14]
## Parsed with column specification:
## cols(
## `File Name: JornadaStudy_007_npp_lizard_pitfall_trap_data.csv Last Update: 01/08/2013 By: John Anderson` = col_character(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_character(),
## X5 = col_character(),
## X6 = col_character(),
## X7 = col_character(),
## X8 = col_character(),
## X9 = col_character(),
## X10 = col_character(),
## X11 = col_character(),
## X12 = col_character(),
## X13 = col_character(),
## X14 = col_character()
## )
#data wrangling
colnames(lizard) = lizard[47, ] #assigning column names
lizard<-lizard[-c(1:47), ] #taking out unnecessary rows
lizard_df<-lizard %>%
filter(rcap!="R") %>% #taking out repeat individuals
filter(site=="CALI")#taking out unnecessary sites
#male vs female weight (g)
lizard_weight<-lizard_df %>%
filter(sex=="F"|sex=="M") %>%
filter(weight!= ".") %>%
#group_by(sex) %>%
mutate(weight=as.numeric(weight)) %>%
mutate(sex=as.factor(sex))
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
mu <- ddply(lizard_weight, "sex", summarise, grp.mean=mean(weight))
ggplot(lizard_weight, aes(x=weight, color=sex))+
geom_histogram(fill="white", alpha=0.5, position="identity")+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex),
linetype="dashed")+
theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
with(lizard_weight, shapiro.test(weight[sex == "F"]))
##
## Shapiro-Wilk normality test
##
## data: weight[sex == "F"]
## W = 0.58543, p-value = 7.423e-12
with(lizard_weight, shapiro.test(weight[sex == "M"]))
##
## Shapiro-Wilk normality test
##
## data: weight[sex == "M"]
## W = 0.58315, p-value = 8.465e-11
#res.ftest <- var.test(weight ~ group, data = lizard_weight)
#res.ftest